# analysis_section_4_3_part2.R
# This script replicates the analysis in Section 4.3 (Part 2) of the paper
#   Table 2 and Table SI.13
# Author: Yimeng Li
# Dependencies:
#   Data/VR_VoteCal_LA.Rdata

# Load libraries:
library(dplyr)
library(MatchIt)
library(purrr)
library(sandwich)
library(tidyr)

# Load data:
load("./Data/VR_VoteCal_LA.Rdata")

# Prepare data for analysis
VR_VoteCal_LA <- VR_VoteCal_LA %>%
  mutate(
    Age_18_to_29 = if_else(Age >= 18 & Age <= 29, 1, 0),
    Age_30_to_44 = if_else(Age >= 30 & Age <= 44, 1, 0),
    Age_45_to_64 = if_else(Age >= 45 & Age <= 64, 1, 0),
    Age_65_or_older = if_else(Age >= 65, 1, 0),
    Age_cat = case_when(Age_18_to_29 == 1 ~ "18 to 29",
                        Age_30_to_44 == 1 ~ "30 to 44",
                        Age_45_to_64 == 1 ~ "45 to 64",
                        Age_65_or_older == 1 ~ "65 or old",
                        TRUE ~ NA_character_),
    Age_cat = factor(Age_cat, levels = c("18 to 29", "30 to 44", "45 to 64", "65 or old")),
    
    Male = if_else(gender_inf < 0.5, 1, 0),
    Female = if_else(gender_inf >= 0.5, 1, 0),
    Gender_cat = case_when(Male == 1 ~ "Male",
                           Female == 1 ~ "Female",
                           TRUE ~ NA_character_),
    Gender_cat = factor(Gender_cat, levels = c("Male", "Female")),
    
    White = if_else(pred.whi >= pred.whi & pred.whi > pred.bla & pred.whi > pred.his & pred.whi > pred.asi & pred.whi > pred.oth, 1, 0),
    Black = if_else(pred.bla > pred.whi & pred.bla >= pred.bla & pred.bla > pred.his & pred.bla > pred.asi & pred.bla > pred.oth, 1, 0),
    Hispanic = if_else(pred.his > pred.whi & pred.his > pred.bla & pred.his >= pred.his & pred.his > pred.asi & pred.his > pred.oth, 1, 0),
    Asian = if_else(pred.asi > pred.whi & pred.asi > pred.bla & pred.asi > pred.his & pred.asi >= pred.asi & pred.asi > pred.oth, 1, 0),
    Other = if_else(pred.oth > pred.whi & pred.oth > pred.bla & pred.oth > pred.his & pred.oth > pred.asi & pred.oth >= pred.oth, 1, 0),
    Race_cat = case_when(White == 1 ~ "White",
                         Black == 1 ~ "Black",
                         Hispanic == 1 ~ "Hispanic",
                         Asian == 1 ~ "Asian",
                         Other == 1 ~ "Other",
                         TRUE ~ NA_character_),
    Race_cat = factor(Race_cat, levels = c("White", "Black", "Hispanic", "Asian", "Other"))
  )

Voters_NPAV <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Non-Perm. VBM", RegisteredBefore2016Pri == 1, !is.na(Dist)) %>%
  select(Row_No, Turnout.2020, Turnout.2016, UVBM, Dist, abs_dist,
         Age_cat, Gender_cat, Race_cat, median_value, School_District)

Voters_PAV <- VR_VoteCal_LA %>%
  filter(IsPermanentVBM == "Perm. VBM", RegisteredBefore2016Pri == 1, !is.na(Dist)) %>%
  select(Row_No, Turnout.2020, Turnout.2016, UVBM, Dist, abs_dist,
         Age_cat, Gender_cat, Race_cat, median_value, School_District)

#*******************************************************************************
# Analysis - Define Functions
#*******************************************************************************

# This function runs the difference-in-differences analysis with matching given
#   PVBM: Permanent VBM status (Non-Perm. VBM or Perm. VBM)
#   Spec: Specification (demo, demo_housing, demo_housing_schools)
#   Distance: Maximal Distance from the Boundary in meters (10000, 5000, 2000, 1000, 500)
get_matching_est <- function(PVBM, Spec, Distance) {
  
  if (PVBM == "Non-Perm. VBM") {
    
    data_for_matching <- Voters_NPAV %>% filter(abs_dist <= Distance)
    
  } else if (PVBM == "Perm. VBM") {
    
    data_for_matching <- Voters_PAV %>% filter(abs_dist <= Distance)
    
  }
  
  if (Spec == "demo") {
    
    m.out <- matchit(UVBM ~ Age_cat + Gender_cat + Race_cat,
                     data = data_for_matching %>%
                       filter(!is.na(Age_cat), !is.na(Gender_cat), !is.na(Race_cat)),
                     method = "exact")
    
  } else if (Spec == "demo_housing") {
    
    m.out <- matchit(UVBM ~ Age_cat + Gender_cat + Race_cat + median_value,
                     data = data_for_matching %>%
                       filter(!is.na(Age_cat), !is.na(Gender_cat), !is.na(Race_cat),
                              !is.na(median_value)),
                     method = "cem")
    
  } else if (Spec == "demo_housing_schools") {
    
    m.out <- matchit(UVBM ~ Age_cat + Gender_cat + Race_cat + median_value + School_District,
                     data = data_for_matching %>%
                       filter(!is.na(Age_cat), !is.na(Gender_cat), !is.na(Race_cat),
                              !is.na(median_value), !is.na(School_District)),
                     method = "cem")
    
  }
  
  m.summary <- summary(m.out)
  
  m.data <- match.data(m.out) %>%
    pivot_longer(cols = c("Turnout.2020", "Turnout.2016"),
                 names_to = c(".value", "Year"),
                 names_sep = "[.]")
  
  m.lm <- lm(Turnout ~ I(UVBM == 1) + I(Year == "2020") + I(UVBM == 1)*I(Year == "2020"),
             data = m.data, weights = weights)
  
  m.estimates <- data.frame(
    term = attributes(m.lm$coefficients)$names,
    coef = as.numeric(m.lm$coefficients),
    se = as.numeric(sqrt(diag(vcovCL(m.lm, cluster = ~ Dist))))
  ) %>%
    mutate(PVBM = PVBM, Spec = Spec, Distance = Distance)
  
  return(list(m.summary, m.estimates))
}

#*******************************************************************************
# Difference-in-Differences Analysis with Matched Subsets of Voters Close to the Boundary
#   Results Reported in Table 2 and Table SI.13
#*******************************************************************************

# Parameter combinations
matching_parameters <- expand.grid(
  Distance = c(10000, 5000, 2000, 1000, 500),
  Spec = c("demo", "demo_housing", "demo_housing_schools"),
  PVBM = c("Non-Perm. VBM", "Perm. VBM")
) %>% relocate(PVBM, Spec, Distance)

# Run matching for all parameter combinations
matching_estimates <- pmap(matching_parameters, ~ get_matching_est(..1, ..2, ..3))

# Save the results for future reference (to avoid re-running the analysis)
# save(matching_estimates, file = "./Replication Package/matching_estimates.RData")

#*******************************************************************************
# Table 2
#   Difference-in-Differences Estimates with Matched Subsets of Voters Close to the
#   Boundary of Universal and Non-Universal Vote-by-Mail Districts
#*******************************************************************************

# This function extracts coefficient estimates
get_coefs <- function(indices, estimates) {
  map_dbl(indices, ~ round(100*estimates[[.x]][[2]]$coef[4], 1))
}

# Table with the coefficient estimates
table_matching_estimates <- data.frame(
  Distance = c(10000, 5000, 2000, 1000, 500),
  coef_or_se = c("coef", "coef", "coef", "coef", "coef"),
  NPAV_Model1 = get_coefs(1:5, matching_estimates),
  NPAV_Model2 = get_coefs(6:10, matching_estimates),
  NPAV_Model3 = get_coefs(11:15, matching_estimates),
  PAV_Model1 = get_coefs(16:20, matching_estimates),
  PAV_Model2 = get_coefs(21:25, matching_estimates),
  PAV_Model3 = get_coefs(26:30, matching_estimates)
)

# This function extracts clustered standard errors
get_ses <- function(indices, estimates) {
  map_dbl(indices, ~ round(100*estimates[[.x]][[2]]$se[4], 1))
}

# Table with the clustered standard errors
table_matching_ses <- data.frame(
  Distance = c(10000, 5000, 2000, 1000, 500),
  coef_or_se = c("se", "se", "se", "se", "se"),
  NPAV_Model1 = get_ses(1:5, matching_estimates),
  NPAV_Model2 = get_ses(6:10, matching_estimates),
  NPAV_Model3 = get_ses(11:15, matching_estimates),
  PAV_Model1 = get_ses(16:20, matching_estimates),
  PAV_Model2 = get_ses(21:25, matching_estimates),
  PAV_Model3 = get_ses(26:30, matching_estimates)
)

# Full Table
Table2 <- bind_rows(table_matching_estimates, table_matching_ses) %>%
  arrange(desc(Distance))

# Save Table
write.csv(Table2, file = "./Replication Package/Table 2.csv", row.names = FALSE, na = '')

#*******************************************************************************
# Table 13
#   Difference-in-Differences Estimates with Matched Subsets of Voters Close to the
#   Boundary of Universal and Non-Universal Vote-by-Mail Districts
#*******************************************************************************

# This pair of functions extracts the average housing value before and after matching
get_avg_value_before <- function(indices, estimates) {
  map_dfr(indices, ~ round(estimates[[.x]][1][[1]]$sum.all["median_value", c("Means Treated", "Means Control")], 0))
}

get_avg_value_after <- function(indices, estimates) {
  map_dfr(indices, ~ round(estimates[[.x]][1][[1]]$sum.matched["median_value", c("Means Treated", "Means Control")], 0))
}

# Balance before and after matching for Non-Permanent VBM voters
table_NPAV_value_balance <- bind_rows(
  get_avg_value_before(6:10, matching_estimates),
  get_avg_value_after(6:15, matching_estimates)
) %>%
  transmute(
    Condition = rep(c("Before Matching",
                      "After Matching (demos + house value)",
                      "After Matching (demos + house value + school districts)"), each = 5),
    Distance = rep(c(10000, 5000, 2000, 1000, 500), 3),
    NPAV_Treatment = `Means Treated`,
    NPAV_Control = `Means Control`
  )

# Balance before and after matching for Permanent VBM voters
table_PAV_value_balance <- bind_rows(
  get_avg_value_before(21:25, matching_estimates),
  get_avg_value_after(21:30, matching_estimates)
) %>% transmute(
  Condition = rep(c("Before Matching",
                    "After Matching (demos + house value)",
                    "After Matching (demos + house value + school districts)"), each = 5),
  Distance = rep(c(10000, 5000, 2000, 1000, 500), 3),
  PAV_Treatment = `Means Treated`,
  PAV_Control = `Means Control`
)

# Full Table
Table_SI13 <- table_NPAV_value_balance %>%
  left_join(table_PAV_value_balance, by = c("Condition", "Distance"))

# Save Table
write.csv(Table_SI13, file = "./Replication Package/Table SI.13.csv", row.names = FALSE, na = '')
